# Import packages and load dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import math
import os
import re
import time
master_tm = time.time()
%matplotlib inline
import warnings
warnings.simplefilter('ignore')
from IPython.core.display import display, HTML
from IPython.display import Image
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from matplotlib import cm
from statsmodels.formula.api import ols
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.cluster import KMeans
try:
import pydot
except:
!pip install pydot
import pydot
df = pd.read_excel('Bike_Sharing2.xlsx')
df
print(df.info())
df.describe()
## Data Cleaning
df.drop(columns=['S_No','index','Date'],inplace=True)
# Rename column names for readability
df.rename(columns= {'Time':'time',
'yr':'year',
'mnth':'month',
'hr':'hour',
'holiday':'is_holiday',
'weekday':'day_of_week',
'workingday':'is_working_day',
'weathersit':'weather_type',
'atemp':'app_temp',
'hum':'humidity',
'windspeed':'wind_speed'}, inplace=True)
# Formatting and extracting datetime data
df.insert(4, 'day', df['time'].dt.day)
print(df.isna().sum())
df_master = df.copy()
df_master
# Further Data Cleaning
df_regression = df.copy()
df_regression.dropna(axis=0, how='any', inplace=True)
df_regression.reset_index(drop=True, inplace=True)
df_regression['total_users'] = df_regression['casual'] + df_regression['registered']
df_regression.drop(columns=['casual',
'registered'], inplace=True)
df_regression['weather_type'] = df_regression['weather_type'].astype('int64')
print(df_regression.isna().sum())
df_regression
# Univariate Analysis - Understanding distribution and shape of each variable
id_vars = ['time']
num_vars = list(df_regression.columns)[-5:]
cat_vars = list(df_regression.columns)[1:-5]
for col in list(df_regression.columns)[1:]:
f, axes = plt.subplots(1, 2, figsize=(12, 4))
if col in cat_vars:
axes[0].bar(x=list(df_regression[col].value_counts().index),
height=list(df_regression[col].value_counts().values))
if col in ['season','year','week','is_holiday','is_working_day','weather_type']:
y_min = int(math.floor(np.min(df_regression[col].value_counts())/500))*500
y_max = int(math.ceil(np.max(df_regression[col].value_counts())/500))*500
axes[0].set_ylim([y_min, y_max])
elif col in ['month','day','hour','day_of_week']:
y_min = int(math.floor(np.min(df_regression[col].value_counts())/50))*50
y_max = int(math.ceil(np.max(df_regression[col].value_counts())/50))*50
axes[0].set_ylim([y_min, y_max])
else:
df_regression[col].hist(ax = axes[0], grid=False)
axes[0].set_title('Distribution of \''+ col + '\'')
df_regression.boxplot(column = col, ax = axes[1], grid=False)
plt.tight_layout()
plt.show()
# Bi-Variate Analysis - Understanding pairwise relationships between variables
start = time.time()
sns.pairplot(df_regression.iloc[:,1:])
print('Runtime:',time.time()-start,'seconds')
# Creating categorical dummy variables
quantitative_data = df_regression[num_vars].copy()
categorical_data = df_regression[cat_vars].copy()
categorical_data.astype('category')
dummy = pd.get_dummies(categorical_data, columns=list(categorical_data.columns))
df_regression = pd.concat([quantitative_data,dummy],axis=1)
df_regression = pd.concat([df_regression.pop('total_users'),df_regression], axis=1)
df_regression
# Correlation Matrix (Heatmap)
corr = df_regression.iloc[:,:5].corr(method='pearson')
plt.figure(figsize=(15, 15))
sns.heatmap(corr, annot=True, cmap="YlGnBu",cbar_kws={'label': 'Correlation'})
plt.tight_layout()
plt.xticks(rotation=45, horizontalalignment='right')
plt.yticks(rotation=0)
plt.show()
plt.clf()
# Dropping highly correlated apparent temperature variable
df_regression.drop(columns=['app_temp'], inplace=True)
df_regression
df = df_regression.copy()
y = list(df.columns)[0]
rename_columns = {}
for column_name in list(df.columns):
## Go through each column name and check if it contains non-alphanumeric
if re.search('\W', column_name):
## Remove non-alphanumeric at the start or end of the column names or replace with '_' if in the middle
new_name = re.sub('^\W+', '', column_name)
new_name = re.sub('\W+$', '', new_name)
rename_columns.update({column_name: re.sub('\W+', '_', new_name)})
df.rename(columns=rename_columns,
inplace=True)
modeleq = ' + '.join(list(df.columns)).replace('+', '~', 1)
print('\nModel equation:', modeleq, '\n')
maxR2 = -np.inf
bmodeleq = modeleq
numx = df.shape[1] - 1
x1x2 = False #interaction variables not yet included
df2 = df.copy()
#print(ols(modeleq, df).fit().summary2())
while True:
regout = ols(modeleq, df).fit()
R2 = regout.rsquared_adj
if R2 > maxR2:
maxR2 = R2
bmodeleq = modeleq
print('\nAdjusted R2 =', R2, 'for', numx, 'Xs.')
if numx == 1:
print('Variable left:', modeleq[modeleq.find('~') + 2 :])
if x1x2:
#one xvar left
#get out of 'while' loop:
break
else:
#add interaction variables for original untransformed variables in best model so far
numx = bmodeleq.count('+') + 1
print('\nRestarting from best model (with', numx, 'Xs) found so far...')
colname = bmodeleq.replace('~', '+').split(' + ')
df = df2[colname]
colname = colname[1:] #remove y
df2 = df.copy()
#delete any x too highly correlated with another x, to avoid collinearity
corv = pd.DataFrame() #start empty dataframe for corr(Xs, y) to come
for x in list(df)[1:]:
#during 1st time thru loop: new column, with label, created in empty dataframe:
#during subsequent time thru loop: new row, with row label, added to dataframe:
corv.loc[x, y] = df[x].corr(df[list(df)[0]])
corv = corv.loc[abs(corv).sort_values([y]).index, :] #corr(Xs, y) ranked
delta = 0.005 #corr difference lower limit
dl2 = []
icorr = True
while icorr:
a = abs(corv).diff() <= delta #adjacent rows with similar abs(corr(Xs, y))
colname = list(df)[1:]
dl = []
print('\nX pairs with correlations >', 1 - delta, ':')
for b in range(1, a.shape[0]):
if a.iloc[b, 0]:
if abs(df[a.index[b - 1]].corr(df[a.index[b]])) > 1 - delta:
#deleting 1 X from correlated pair:
dv0 = a.index[b - 1]
dv1 = a.index[b]
#neither should already be deleted:
if not (dv0 in dl) and not (dv1 in dl):
#delete x with rather lower corr(x, y):
if abs(corv.loc[dv0, y]) - abs(corv.loc[dv1, y]) >= delta:
d = dv1
elif len(dv0) < len(dv1): #delete x with longer name:
d = dv1
else:
d = dv0
dl.append(d) #for en masse deletion later
corv.drop([d], axis=0, inplace=True) #delete from column of corr with y
print(dv0,',',dv1)
if len(dl) > 0:
df.drop(axis=1, columns=dl, inplace=True) #variables deleted en masse
dl2 = dl2 + dl #keep for real deletion later
print('\n' + str(len(dl)), 'variables considered for deletion:')
print('\n'.join([str(x) for x in dl]))
else:
print('(no more)')
icorr = False
dl2 = [x for x in dl2 if x.find('_x_') != -1] #only interaction variables kept
df2.drop(axis=1, columns=dl2, inplace=True) #collinear interaction variables deleted en masse, for real
#remaining Xs may be collinear
print('\n' + str(len(dl2)) + ' interaction variables deleted.')
#potential collinearity issues handled
modeleq = ' + '.join(list(df2)).replace('+', '~', 1)
numx = df2.shape[1] - 1
x1x2 = True #interaction variables already included
#beyond-pairwise collinearity may still be introduced with the interaction variables
df = df2.copy() #ready for continuing deletion
continue
#identify X variable to delete by finding the one with smallest abs(t-stat):
t = regout.tvalues[1:]
xdrop = list(t[abs(t) == min(abs(t))].index)[0]
print('Variable to drop:', xdrop)
df.drop(xdrop, axis=1, inplace=True)
modeleq = ' + '.join(list(df)).replace('+', '~', 1)
numx = numx - 1
numx = bmodeleq.count('+') + 1
print('\nBest model has', numx, 'Xs:')
out = ols(bmodeleq, df2).fit()
print(out.summary2())
# Split training and testing set for ols model after variable selection
data = df2.copy()
x1 = data.iloc[:,1:]
y1 = data.iloc[:,0:1]
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1,
test_size=0.3,
shuffle=True,
random_state=42)
training_set = pd.concat([y1_train, x1_train], axis=1)
training_set
rmse_comparison = []
reg_eqn = ' + '.join(list(df2.columns)).replace('+', '~', 1)
print('Model equation:\n', reg_eqn)
reg_model = ols(reg_eqn, training_set).fit()
y_pred = reg_model.predict(x1_test)
rmse = np.sqrt(mean_squared_error(y_pred, y1_test))
rmse_comparison.append(rmse)
print('\nRMSE (Linear Regression) =', rmse)
# Split training and testing set for the remaining regression models
data = df_regression.copy()
x = data.iloc[:,1:]
y = data.iloc[:,0:1]
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size = 0.3,
shuffle=True,
random_state=42)
linear_regr = LinearRegression(fit_intercept=True,
normalize=False,
copy_X=True,
n_jobs=-1)
linear_regr.fit(x_train, y_train)
y_pred = linear_regr.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
rmse_comparison.append(rmse)
print('RMSE (Linear Regression) =',rmse)
alpha = np.linspace(0,10,1001)
rmse_all = []
for i in alpha:
lasso_regr = Lasso(alpha=i,
fit_intercept=True,
normalize=False,
copy_X=True,
max_iter=100,
tol=0.01,
random_state=42)
lasso_regr.fit(x_train, y_train)
y_pred = lasso_regr.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
rmse_all.append(rmse)
index = np.argmin(rmse_all)
best_alpha = alpha[index]
best_rmse = rmse_all[index]
rmse_comparison.append(best_rmse)
plt.plot(alpha, rmse_all)
plt.xlabel("alpha")
plt.ylabel("rmse")
plt.show()
print('Best alpha =', best_alpha)
print('Best RMSE (Lasso Regression) =', best_rmse)
alpha = np.linspace(0,10,1001)
rmse_all = []
for i in alpha:
ridge_regr = Ridge(alpha=i,
fit_intercept=True,
normalize=False,
copy_X=True,
max_iter=100,
tol=0.01,
random_state=42)
ridge_regr.fit(x_train, y_train)
y_pred = ridge_regr.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
rmse_all.append(rmse)
index = np.argmin(rmse_all)
best_alpha = alpha[index]
best_rmse = rmse_all[index]
rmse_comparison.append(best_rmse)
plt.plot(alpha, rmse_all)
plt.xlabel("alpha")
plt.ylabel("rmse")
plt.show()
print('Best alpha =', best_alpha)
print('Best RMSE (Ridge Regression) =', best_rmse)
enet_tm = time.time()
alpha = np.linspace(0,5,501)
ratio = np.linspace(0,1,11)
best_alpha = None
best_ratio = None
best_rmse = 1e+8
for i in alpha:
for j in ratio:
elastic_net = ElasticNet(alpha=i,
l1_ratio=j,
fit_intercept=True,
normalize=False,
max_iter=100,
tol=0.01,
copy_X=True,
random_state=42)
elastic_net.fit(x_train, y_train)
y_pred = elastic_net.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
if rmse < best_rmse:
best_alpha = i
best_ratio = j
best_rmse = rmse
rmse_comparison.append(best_rmse)
print('Best alpha =', best_alpha)
print('Best ratio =', best_ratio)
print('Best RMSE (Elastic Net) =', best_rmse)
print('Runtime:', round((time.time()-enet_tm)/60,2), 'mins')
# Grid Search CV for best RFR model hyperparameters
rfr_tm = time.time()
rfr = RandomForestRegressor(random_state=42)
parametersGrid = {
'n_estimators': [100, 200, 500],
'max_features': ['auto','sqrt','log2']
}
best_rfr = GridSearchCV(estimator=rfr, param_grid=parametersGrid, cv=5, n_jobs=-1)
best_rfr.fit(x_train, y_train)
print('Runtime:', round((time.time()-rfr_tm)/60,2), 'mins')
best_rfr.best_params_
y_pred = best_rfr.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
rmse_comparison.append(rmse)
print('Best RMSE (Random Forest Regression) =', rmse)
# Comparing different linear model performances
df_rmse = pd.DataFrame({'RMSE':rmse_comparison}, index=['OLS (Variable Selection)',
'Linear Regression',
'Lasso Regression',
'Ridge Regression',
'ElasticNet Regression',
'Random Forest Regression'])
df_rmse
# Selecting features that are relevant to total bike usage
df_classifier = df_master.loc[:,'hour':].copy()
df_classifier.dropna(axis=0, how='any', inplace=True)
df_classifier.reset_index(drop=True, inplace=True)
# Create total bike usage and
df_classifier['total_users'] = df_classifier['casual'] + df_classifier['registered']
df_classifier.drop(columns=['app_temp',
'casual',
'registered'], inplace=True)
df_classifier['weather_type'] = df_classifier['weather_type'].astype('int64')
df_classifier.describe()
# Visualizing Total User Count for feature engineering
plt.figure(figsize=(10,6))
plt.xlabel('total_users')
plt.title('Total Bike User Count')
df_classifier.total_users.plot.hist(bins=[0,200,600,1000])
# Feature Engineering (Creating 'Low', 'Moderate' and 'High' User Traffic from User Count)
df_classifier['user_traffic'] = ''
for i in range(len(df_classifier)):
temp = df_classifier['total_users'][i]
if temp < 200:
df_classifier['user_traffic'][i] = 'Low'
elif temp < 600:
df_classifier['user_traffic'][i] = 'Moderate'
else:
df_classifier['user_traffic'][i] = 'High'
df_classifier.drop(columns=['total_users'], inplace=True)
df_classifier = pd.concat([df_classifier.pop('user_traffic'),df_classifier], axis=1)
print(df_classifier['user_traffic'].value_counts())
df_classifier
# Creating categorical dummy variables
cat_vars = list(df_classifier.columns)[1:-3]
num_vars = list(df_classifier.columns)[-3:]
quantitative_data = df_classifier[num_vars].copy()
categorical_data = df_classifier[cat_vars].copy()
categorical_data.astype('category')
dummy = pd.get_dummies(categorical_data, columns=list(categorical_data.columns))
df_classifier = pd.concat([df_classifier[['user_traffic']],quantitative_data,dummy],axis=1)
df_classifier
# Split train and test set for classification cross validation
x = df_classifier.iloc[:,1:].copy()
y = df_classifier.iloc[:,0:1].copy()
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.3,
shuffle=True,
random_state=42)
# K-NN Classification
classifier_comparison = []
knn_classifier = KNeighborsClassifier(n_neighbors=10,
weights='distance',
algorithm='auto',
p=2,
n_jobs=-1)
knn_classifier.fit(x_train, y_train)
print('Accuracy of fit =', knn_classifier.score(x_train, y_train))
print('Accuracy of prediction =', knn_classifier.score(x_test, y_test), '\n')
classifier_comparison.append(knn_classifier.score(x_test, y_test))
tab = pd.crosstab(knn_classifier.predict(x_test), y_test['user_traffic'], margins=True)
tab.index.name = 'Prediction'
tab
# Decision Tree Classification
decision_tree_classifier = DecisionTreeClassifier(criterion='gini',
splitter='best',
random_state=42)
decision_tree_classifier.fit(x_train, y_train)
print('Accuracy of Fit (Decision Tree) =', decision_tree_classifier.score(x_train, y_train))
print('Accuracy of Prediction (Decision Tree) =', decision_tree_classifier.score(x_test, y_test), '\n')
classifier_comparison.append(decision_tree_classifier.score(x_test, y_test))
tab2 = pd.crosstab(decision_tree_classifier.predict(x_test), y_test['user_traffic'], margins=True)
tab2.index.name = 'Prediction'
tab2
# Tuning Max Depth hyperparameter to reduce model complexity and determining 'gini' vs 'entropy' criterion
# for best model performance
gini_accuracy = []
entropy_accuracy = []
depth_range = np.arange(1,31)
for i in depth_range:
dt_classifier = DecisionTreeClassifier(criterion='gini',
splitter='best',
max_depth=i,
random_state=42)
dt_classifier.fit(x_train, y_train)
g_acc = dt_classifier.score(x_test, y_test)
gini_accuracy.append(g_acc)
dt_classifier = DecisionTreeClassifier(criterion='entropy',
splitter='best',
max_depth=i,
random_state=42)
dt_classifier.fit(x_train, y_train)
e_acc = dt_classifier.score(x_test, y_test)
entropy_accuracy.append(e_acc)
plt.figure(figsize=(12,8))
plt.plot(depth_range, gini_accuracy, label='gini')
plt.plot(depth_range, entropy_accuracy, label='entropy')
plt.title('Max Depth vs Accuracy')
plt.xlabel('Max Depth')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
# Comparing Accuracy Scores across both models for different Max Depth levels
df_pruning = pd.DataFrame({'Max Depth':depth_range,
'Gini Accuracy':gini_accuracy,
'Entropy Accuracy':entropy_accuracy})
df_pruning
# Tuning Minimum Samples required for a split to prevent model overfitting
gini_accuracy2 = []
min_split_range = np.arange(2,101)
for i in min_split_range:
dt_classifier = DecisionTreeClassifier(criterion='gini',
splitter='best',
max_depth=18,
min_samples_split=i,
random_state=42)
dt_classifier.fit(x_train, y_train)
g_acc = dt_classifier.score(x_test, y_test)
gini_accuracy2.append(g_acc)
plt.figure(figsize=(12,8))
plt.plot(min_split_range, gini_accuracy2, label='gini')
plt.title('Max Depth vs Accuracy')
plt.xlabel('Max Depth')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
# Comparing Model Accuracy across different Minimum Samples levels
df_pruning2 = pd.DataFrame({'Minimum Splits':min_split_range,
'Gini Accuracy':gini_accuracy2})
df_pruning2.head(50)
# Building Pruned Decision Tree
dt_pruned = DecisionTreeClassifier(criterion='gini',
splitter='best',
max_depth=18,
min_samples_split=39,
random_state=42)
dt_pruned.fit(x_train, y_train)
print('Accuracy of Fit (Decision Tree with Pruning) =', dt_pruned.score(x_train, y_train))
print('Accuracy of Prediction (Decision Tree with Pruning) =', dt_pruned.score(x_test, y_test), '\n')
classifier_comparison.append(dt_pruned.score(x_test, y_test))
tab3 = pd.crosstab(dt_pruned.predict(x_test), y_test['user_traffic'], margins=True)
tab3.index.name = 'Prediction'
tab3
# Decision Tree Visualization
target_classes = sorted(list(df_classifier.user_traffic.value_counts().index))
dot_data = export_graphviz(dt_pruned,
feature_names=list(df_classifier.columns)[1:],
class_names=target_classes,
label='all',
filled=True,
impurity=True,
rounded=True,
special_characters=True,
precision=3)
(graph,) = pydot.graph_from_dot_data(dot_data)
print('Classify \''+list(df_classifier.columns)[0]+'\' with',
len(df_classifier),
'classes',
str(target_classes))
Image(graph.create_png())
# Comparison of Classification Model Accuracy
df_accuracy = pd.DataFrame({'Accuracy':classifier_comparison}, index=['K-NN Classification',
'Decision Tree Classification',
'Decision Tree Classification (with Pruning)'])
df_accuracy
# Data preparation
df_cluster = df_master.copy()
df_cluster.dropna(axis=0, how='any', inplace=True)
df_cluster.reset_index(drop=True, inplace=True)
df_cluster.drop(columns=['temp'], inplace=True)
for col in ['casual','registered']:
df_cluster[col] = df_cluster[col].astype('int64')
df_cluster['weather_type'] = df_cluster['weather_type'].astype('int64')
df_cluster.drop(columns=['time'], inplace=True)
df_cluster
# Selecting features for Clustering
x = df_cluster.loc[:,'app_temp':'wind_speed'].copy()
x
# Elbow Method for K-Means Clustering
cluster_range = np.arange(1,11)
wcss_list = []
for i in cluster_range:
k_means = KMeans(n_clusters=i,
init='k-means++',
copy_x=True,
n_jobs=-1,
random_state=42)
k_means.fit(x)
print('Cluster(s) =',str(i)+',','WCSS =',k_means.inertia_)
wcss_list.append(k_means.inertia_)
plt.rcParams['figure.figsize'] = 15, 7
plt.plot(cluster_range, wcss_list)
plt.title('Elbow Method for choosing K-Means Clusters', fontweight='bold')
plt.xlabel('Number of Clusters')
plt.ylabel('Within-Cluster Sum of Squares')
plt.show()
# Performing K-Means Clustering with 3 clusters
k_means = KMeans(n_clusters=3,
init='k-means++',
copy_x=True,
n_jobs=-1,
random_state=42)
cluster_pred = pd.Series(k_means.fit_predict(x))
cluster_results = pd.DataFrame({'Cluster':cluster_pred.values})
cluster_results = pd.concat([df_cluster,cluster_results], axis=1)
print(cluster_results.Cluster.value_counts())
cluster_results
# Cluster Distribution
cluster_dist = cluster_results.groupby(by=['Cluster'], as_index=False)
cluster_dist = cluster_dist[['casual','registered']].sum()
cluster_dist
# Plotting Casual and Registered user distribution across different clusters
plt.rcParams['figure.figsize'] = 12, 8
cluster_dist.iloc[:,1:].plot.bar()
plt.title('Casual and Registered user Population across different Clusters')
plt.xticks(rotation=0)
plt.xlabel('Cluster')
plt.ylabel('User Count')
plt.tight_layout()
plt.show()
# Full Visualization of Cluster Features of Casual and Registered users
f, axes = plt.subplots(6, 6, figsize=(36, 32))
for k in range(3):
temp = cluster_results[cluster_results['Cluster']==k]
for i, col in enumerate(['season']+list(temp.columns)[4:9]):
df_grouped = temp.groupby(by=[col], as_index=False)
df_grouped = df_grouped[['casual','registered']].sum()
axes[i,2*k].bar(x=df_grouped[col].tolist(),
height=df_grouped['casual'].to_list(),
color=cm.tab20.colors[2*i+1])
y_min = int(math.floor(np.min(df_grouped['casual'].to_list())/10000))*10000
y_max = int(math.ceil(np.max(df_grouped['casual'].to_list())/10000))*10000
axes[i,2*k].set_ylabel('User Count')
axes[i,2*k].set_ylim([y_min, y_max])
axes[i,2*k].set_title('\''+col+'\''+' (Cluster '+str(k+1)+', Casual)',
fontsize=18)
axes[i,2*k+1].bar(x=df_grouped[col].tolist(),
height=df_grouped['registered'].to_list(),
color=cm.tab20.colors[2*i])
y_min = int(math.floor(np.min(df_grouped['registered'].to_list())/10000))*10000
y_max = int(math.ceil(np.max(df_grouped['registered'].to_list())/10000))*10000
axes[i,2*k+1].set_ylim([y_min, y_max])
axes[i,2*k+1].set_title('\''+col+'\''+' (Cluster '+str(k+1)+', Registered)',
fontsize=18)
plt.subplots_adjust(wspace=0.05,hspace=0.05)
plt.tight_layout()
plt.show()
print('Total Notebook Runtime:',round((time.time()-master_tm)/60,2),'mins')